import time
from tqdm import tqdm
import json
import jsonlines
import numpy as np
from fuzzywuzzy import fuzz
import spacy
nlp = spacy.load("en_core_web_sm")

def jaccard_metric(prediction, result):
    union = np.logical_or(prediction, result)
    insec = np.logical_and(prediction, result)
    score = float(np.count_nonzero(insec)) / float(np.count_nonzero(union))
    return score

if __name__ == "__main__":
    dataset = jsonlines.open('./gpt4_ans/winogavil/casenum_icl/8/swow/test.jsonl', mode='r') 
    jaccard_score = 0.0
    count = 0
    with tqdm(desc='Process', unit='it', total=84) as pbar: #5_6: (260); 10_12: (85); swow: (84)
        for line in dataset.iter():
            captions = line["explanations"]
            chatgpt = line["gpt4"]
            predictions = np.zeros_like(line['labels'])
            k = np.count_nonzero(line['labels'])
            paragraph = f"""
                    {chatgpt}
                    """
            doc = nlp(paragraph.lower())
            for id, caption in enumerate(captions):
                for sent in doc.sents:
                    if caption.lower() in sent.text:
                        predictions[id] = 1 
                        break
            information = {}
            information['images'] = line['images']
            information['cue'] = line['cue']
            information['labels'] = line['labels']
            information['predictions'] = [int(i) for i in predictions]
            score = jaccard_metric(information['predictions'], information['labels'])
            jaccard_score += score
            pbar.set_postfix(jaccard_score=jaccard_score / (count + 1))
            count = count + 1
            information['jaccard_metric'] = score
            information['explanations'] = line['explanations']
            information['gpt4'] = line["gpt4"]
            pbar.update()



